####################################
#Diff-in-diff with matching and dual-distance cutoffs - PA figure, dd with dd 
#Traffic and Political Attitudes
#Connor Jerzak & Brian Libgober
#76/276/70, 476: HAS ez-passes  
#79, 80, 81, : NO EZPass 
####################################

########################
#Initial setup 
########################
#set wd
setwd("~/Dropbox/Collaborations/Traffic/project_analysis")

#external libraries 
library(ggplot2)

##############################
#set variable values
##############################

#load in data 
#NJ_data <- read.csv("NJ_precinct_master.csv", header=T)
PA_data <- read.csv("PA_precinct_master.csv", header=T)

#set general specifications for later modifications
my_data <- PA_data 

#outcomes 
outcome_time1_name <- "USPP2000"
outcome_time2_name <- "USPP2004"  #US president (dem) percent 2000, 2004

#identify matching variables 
#matching_variables <- c("cepctwhite", "cepctblack", "cepctwomen", "cnipoverty", "cnpopurban") #income, commute length, number of cars, etc.
matching_variables <- c("census_2000_purban", "census_2000_pblack", "census_2000_punemployed") #"census_2000_pfam_belowpoverty", "census_2000_pworkers_Commute_less9") #income, commute length, number of cars, etc. 

#treated basis (something having to do with E-ZPasses)
#treatment_basis_name <- "EZDist_PA_NEAR_DIST" 
#treatment_basis_name <- "EZPlaza_TravelDistOptimizedDist_PA_Total_Time"
treatment_basis_name <- "EZPlaza_TravelDistOptimizedDist_PA_Total_Leng"

#control basis (something having to do with non-E-ZPass highway)
#control_basis_name <- c("minDist_nonEZ_hwy")
control_basis_name <- c("minNetworkOptimizedDistance_Distance_nonEZ_hwy")

#define cutoffs 
##############################
#loop to generate figure 
##############################
max_length_out <- 5; min_length_out <- 5;
#max_seq <- seq(250, 3000, length.out = max_length_out)#crow
#min_seq <- seq(250, 38000, length.out = min_length_out)#crow

##giver of good results 
max_seq <- seq(5, 14, length.out = max_length_out)
min_seq <- seq(14, 36.7, length.out = min_length_out)

##test
#max_seq <- seq(2, 60, length.out = max_length_out)
#min_seq <- seq(15, 60, length.out = min_length_out)

BIG_STORAGE_LIST <- list()
BIG_BALANCE_LIST <- list()
source("./causal_effect_estimation/dd_with_matchBoot_function.R"); counter1 <- 0 
for(j in min_seq)
{ 
  counter1 <- counter1 + 1 
    results_list <- list(); balance_list <- list()
    counter2 <- 0; for(i in max_seq)
    { 
      counter2 <- counter2 + 1
      print(counter2)
      RESULT <- dd_with_dd(my_data=my_data,
                                  treatment_basis_name=treatment_basis_name, 
                                  control_basis_name=control_basis_name, 
                                  outcome_time1_name=outcome_time1_name, 
                                  outcome_time2_name=outcome_time2_name,
                                  matching_variables=matching_variables,
                                  treatment_basis_treated_max=i, 
                                  treatment_basis_control_min=j, 
                                  control_basis_control_max=i,
                                  control_basis_treated_min=j, 
                                  boostrap_SDs=T, 
                                  bootstrap_numb=100, 
                                  conf_level=0.99, 
                                  my_method = "nearest", 
                                  my_replace=T, 
                                  start_browser=F)
      results_list[[counter2]] <- unlist(RESULT[[1]])
      balance_list[[counter2]] <- RESULT[[2]]
    }
    BIG_STORAGE_LIST[[counter1]] <- results_list
    BIG_BALANCE_LIST[[counter1]] <- balance_list
}

####################
#save and plot results
####################
#save results as appropriate objects 
results_list <- unlist(BIG_STORAGE_LIST, recursive=F, use.names=T)
index_seq <- seq(1, max_length_out*min_length_out)
boot_used <- T
#match_name <- "nearest_crowflies"
match_name <- "nearest_NetDistMinDist_PA"
#match_name <- "nearest_NetDistMinTime"
for(index in 1:min_length_out)
{
  start <- max_length_out*(index-1)+1
  end <- max_length_out*(index)
  
  internal_list <- results_list[start:end]
  
  unlisted_results <- unlist(internal_list, recursive=F, use.names=F) 
  base_results <- unlist(unlisted_results[(1:length(unlisted_results))%%4==1])
  upper <- unlist(unlisted_results[(1:length(unlisted_results))%%4==2])
  lower <- unlist(unlisted_results[(1:length(unlisted_results))%%4==3])
  match_numbs <- unlist(unlisted_results[(1:length(unlisted_results))%%4==0])
  
  gg_data <- data.frame(cbind(max_seq, base_results, lower, upper, match_numbs))
  
  if(boot_used==T)
  { 
  g <- ggplot(gg_data, aes(x=max_seq, y=base_results)) + 
       geom_point() + 
       geom_errorbar(aes(ymin=lower, ymax=upper), width=0.2) + 
       geom_hline(yintercept = 0, line_type="dashed") + 
       geom_errorbar(aes(ymin=lower, ymax=upper), width=0.2) + 
       ylab("DiD effect (in % points)") + 
       xlab("Maximum distance from E-ZPass or Control Highway") + 
       ggtitle("Maximum Distance vs. DiD Effect (in % points) for PA") + 
       theme(text = element_text(size=13)) 
  ggsave(g, file=paste(match_name, sprintf("%i.png", index), sep=""))
  }
  else
  { 
    g <- ggplot(gg_data, aes(x=max_seq, y=base_results)) + 
        geom_point() + 
        geom_hline(yintercept = 0, line_type="dashed") + 
        ylab("DiD effect (in % points)") + 
        xlab("Maximum distance from E-ZPass or Control Highway") + 
        ggtitle("Maximum Distance vs. DiD Effect (in % points) for PA") + 
        theme(text = element_text(size=13)) 
  ggsave(g, file=paste("./causal_effect_estimation/", paste(match_name, sprintf("%i.png", index), sep=""), sep=""))
  }
}